Source code for hysop.backend.device.codegen.base.statistics

# Copyright (c) HySoP 2011-2024
#
# This file is part of HySoP software.
# See "https://particle_methods.gricad-pages.univ-grenoble-alpes.fr/hysop-doc/"
# for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import copy
import numpy as np

from hysop.tools.htypes import check_instance
from hysop.tools.units import bytes2str
from hysop.backend.device.opencl.opencl_types import (
    vsizes,
    signed_base_types,
    unsigned_base_types,
    float_base_types,
)

dtype_ops = {
    np.int8: (0.25, "IOPS"),
    np.int16: (0.50, "IOPS"),
    np.int32: (1.00, "IOPS"),
    np.int64: (2.00, "IOPS"),
    np.uint8: (0.25, "IOPS"),
    np.uint16: (0.50, "IOPS"),
    np.uint32: (1.00, "IOPS"),
    np.uint64: (2.00, "IOPS"),
    np.float16: (0.50, "FLOPS"),
    np.float32: (1.00, "FLOPS"),
    np.float64: (2.00, "FLOPS"),
    np.complex64: (2.00, "FLOPS"),
    np.complex128: (4.00, "FLOPS"),
}


def _fill_dtype_ops():
    integer_base_types = [signed_base_types, unsigned_base_types]
    ibytes = [1, 2, 4, 8]
    for int_base_types in integer_base_types:
        for itype, size in zip(int_base_types, ibytes):
            for vsize in vsizes:
                typename = itype + ("" if vsize == 1 else str(vsize))
                dtype_ops[typename] = (vsize * float(size) // 4, "IOPS")
    fbytes = [2, 4, 8]
    for ftype, size in zip(float_base_types, fbytes):
        for vsize in vsizes:
            typename = ftype + ("" if vsize == 1 else str(vsize))
            dtype_ops[typename] = (vsize * float(size) // 4, "FLOPS")


_fill_dtype_ops()


[docs] class WorkStatistics: def __init__(self, stat=None): if stat is not None: check_instance(stat, WorkStatistics) self.global_mem_byte_reads = stat.global_mem_byte_reads self.global_mem_byte_writes = stat.global_mem_byte_writes self.local_mem_byte_reads = stat.local_mem_byte_reads self.local_mem_byte_writes = stat.local_mem_byte_writes self.ops_per_type = copy.deepcopy(stat.ops_per_type) else: self.global_mem_byte_reads = 0 self.global_mem_byte_writes = 0 self.local_mem_byte_reads = 0 self.local_mem_byte_writes = 0 self.ops_per_type = {}
[docs] def compute_timed_statistics(self, duration): return TimedWorkStatistics(self, duration)
[docs] def global_mem_transactions(self): return self.global_mem_byte_writes + self.global_mem_byte_reads
[docs] def global_mem_rw_ratio(self): return float(self.global_mem_byte_writes) / self.global_mem_transactions()
[docs] def global_mem_read_ratio(self): return float(self.global_mem_byte_reads) / self.global_mem_transactions()
[docs] def local_mem_transactions(self): return self.local_mem_byte_writes + self.local_mem_byte_reads
[docs] def local_mem_rw_ratio(self): return float(self.local_mem_byte_writes) / self.local_mem_transactions()
[docs] def local_mem_read_ratio(self): return float(self.local_mem_byte_reads) / self.local_mem_transactions()
[docs] def total_mem_transactions(self): return self.local_mem_transactions() + self.global_mem_transactions()
[docs] def has_local_mem_transactions(self): return self.local_mem_transactions() > 0
[docs] def has_global_mem_transactions(self): return self.global_mem_transactions() > 0
def __add__(self, rhs): check_instance(rhs, WorkStatistics) stats = copy.deepcopy(self) stats.global_mem_byte_reads += rhs.global_mem_byte_reads stats.global_mem_byte_writes += rhs.global_mem_byte_writes stats.local_mem_byte_reads += rhs.local_mem_byte_reads stats.local_mem_byte_writes += rhs.local_mem_byte_writes for k, v in rhs.ops_per_type.items(): if k not in stats.ops_per_type: stats.ops_per_type[k] = v else: stats.ops_per_type[k] += v return stats def __mul__(self, rhs): check_instance(rhs, int) stats = copy.deepcopy(self) stats.global_mem_byte_reads *= rhs stats.global_mem_byte_writes *= rhs stats.local_mem_byte_reads *= rhs stats.local_mem_byte_writes *= rhs for k in stats.ops_per_type.keys(): stats.ops_per_type[k] *= rhs return stats def __rmul__(self, lhs): check_instance(lhs, int) return self.__mul__(lhs) def __str__(self): op_count = [""] + [f"{k}: {v}" for (k, v) in self.ops_per_type.items()] op_count = "\n ".join(op_count) ss = ":: Work Statistics ::" if self.has_global_mem_transactions(): ss += "\n Global memory: load={} store={} total={} rw_ratio={}".format( bytes2str(self.global_mem_byte_reads), bytes2str(self.global_mem_byte_writes), bytes2str(self.global_mem_transactions()), round(self.global_mem_rw_ratio(), 2), ) if self.has_local_mem_transactions(): ss += "\n Local memory: load={} store={} total={} rw_ratio={}".format( bytes2str(self.local_mem_byte_reads), bytes2str(self.local_mem_byte_writes), bytes2str(self.local_mem_transactions()), round(self.local_mem_rw_ratio(), 2), ) ss += f"\n Operations count: {op_count}" return ss
[docs] class TimedWorkStatistics(WorkStatistics): def __init__(self, workstat, duration): super().__init__(workstat) self.duration = duration self._init()
[docs] def ops_per_second(self): return self._ops_per_second
[docs] def ops_per_category(self): return self._ops_per_category
[docs] def global_mem_throughput(self): return self.global_mem_transactions() / self.duration
[docs] def local_mem_throughput(self): return self.local_mem_transactions() / self.duration
[docs] def total_mem_throughput(self): return self.total_mem_transactions() / self.duration
def _init(self): for dtype in self.ops_per_type: if dtype not in dtype_ops.keys(): msg = f"unknown type {dtype}, valed types are:\n\t{dtype_ops.keys()}." raise ValueError(msg) ops_count = {} for dtype, N in self.ops_per_type.items(): (multiplier, op_category) = dtype_ops[dtype] if op_category not in ops_count: ops_count[op_category] = 0.0 ops_count[op_category] += multiplier * N ops_per_second = {} for op_category, op_count in ops_count.items(): ops_per_second[op_category] = op_count / self.duration self._ops_per_category = ops_count self._ops_per_second = ops_per_second